## ----------------------------------------------------------
## Code to clean Bukoba study Qual 2 transcripts for Analysis
## ----------------------------------------------------------

require(readxl, quietly = TRUE)
require(tidyverse, quietly = TRUE)
require(lubridate, quietly = TRUE)
require(stringi, quietly = TRUE)

## Load data 
## Dropping NAs where enumerator did not make any notes or comments
## Take YY value for mismatches between AW and UPWORKER

qual2_orig <- read_csv("../data/bukobaqual2/Twaweza_SchoolTranscribing_Reconcile.csv") %>%
  drop_na(schoolname, respondenttype) %>%
  mutate(VALUE = case_when(mismatch == 1~VALUE_YY,
                           mismatch == 0~VALUE_AW))

qual2_orig$value <- case_when(
  qual2_orig$question_var2 %in% c("teach_subjects", 
                                  "questions_for_interviewer", 
                                  "note1", 
                                  "additional_engage_parents")~qual2_orig$answer,
  TRUE~qual2_orig$VALUE
)

## Turn long data (each observation is a response) into wide (each observation is school-speaker)
qual2_long <- dplyr::select(qual2_orig, schoolname, respondenttype, question_var2, VALUE)
qual2 <- spread(qual2_long, key = question_var2, value = VALUE)

## Convert variables to correct classes
# to numeric
cols.num <- c("actions_parent_group", 
              "change_edu_outcomes",
              "change_edu_resources",
              "contributions_parents",
              "different_par_stan5",
              "eduquality_parent_effect",
              "help_parents",
              "parent_attend_speakup_3",
              "parent_attend_speakup_4",
              "parent_interactions_2yrs",
              "parent_type",
              "parentmeeting_last_1",
              "parentmeeting_last_2",
              "past_study_students",
              "response_localgov",
              "study_2016",
              "study_reaction_classteacher",
              "study_reaction_parent",
              "study_reaction_teacher",
              "test_performance_stan5",
              "unique_school",
              "years_head_teacher",
              "years_teacher")

qual2[cols.num] <- sapply(qual2[cols.num], as.numeric)
sapply(qual2, class)

# to percentage and date
qual2 <- qual2 %>%
  mutate(parent_attend_speakup_2 = gsub("\\%", "", parent_attend_speakup_2),
         parent_attend_speakup_2 = as.numeric(parent_attend_speakup_2) / 100,
         parent_attend_speakup_1 = gsub("\\%", "", parent_attend_speakup_1),
         parent_attend_speakup_1 = as.numeric(parent_attend_speakup_1) / 100,
         date_interview = as.Date(date_interview, "%m/%d/%y"))

## Transform variables 
# financial or nonfinancial contributions
qual2$contributions_parents_financial <- case_when(qual2$contributions_parents == 2~0, 
                                                   TRUE~qual2$contributions_parents) # only 1 if financial

qual2$contributions_parents_other <- case_when(qual2$contributions_parents == 1~0,
                                               qual2$contributions_parents == 2~1,
                                               TRUE~qual2$contributions_parents) # only 1 if nonfinancial

qual2$contributions_parents_any <- case_when(qual2$contributions_parents == 2~1, 
                                             TRUE~qual2$contributions_parents) # 1 if either financial or nonfinancial

# respondent type
qual2$respondenttype2 <- case_when(qual2$respondenttype == "Head Teacher"~"Head Teacher",
                                   qual2$respondenttype == "Research Assistant"~"Research Assistant",
                                   TRUE~"Class Teacher")

## Treatment status
SOschs <- c("BUTAINAMWA PRIMARY SCHOOL",
            "KABIRIZI PRIMARY SCHOOL",
            "KASHARU PRIMARY SCHOOL",
            "KIIJONGO PRIMARY SCHOOL",
            "MASHULE PRIMARY SCHOOL",
            "NYAKATO PRIMARY SCHOOL",
            "OMUKARAMA PRIMARY SCHOOL",
            "RUTETE PRIMARY SCHOOL")

IWschs <- c("KALEMA PRIMARY SCHOOL",
            "KASHOZI PRIMARY SCHOOL",
            "KATALE PRIMARY SCHOOL",
            "KATOJU PRIMARY SCHOOL",
            "KIHUMULO PRIMARY SCHOOL",
            "KYAMULAILE PRIMARY SCHOOL",
            "KYENGE PRIMARY SCHOOL",
            "RUHUNGA PRIMARY SCHOOL")
  
VPschs <- c("BUJUGO PRIMARY SCHOOL",
            "KABAGARA PRIMARY SCHOOL",
            "KATUNGA PRIMARY SCHOOL",
            "KATWE PRIMARY SCHOOL",
            "KIKOMELO PRIMARY SCHOOL",
            "MUSIRA PRIMARY SCHOOL",
            "NGARAMA PRIMARY SCHOOL",
            "RUSHAKA PRIMARY SCHOOL")

qual2$treat <- case_when(qual2$schoolname %in% SOschs~"Survey Only",
                         qual2$schoolname %in% IWschs~"Info Workshop",
                         qual2$schoolname %in% VPschs~"Validated Participation") 

qual2$TreatCat <- case_when(qual2$treat == "Survey Only"~"SO",
                                qual2$treat == "Info Workshop"~"IW",
                                qual2$treat == "Validated Participation"~"VP",
                                TRUE~NA_character_) 
                        
qual2$TreatCat <- factor(qual2$TreatCat, 
                            levels=c("SO",
                                     "IW",
                                     "VP"))

qual2$VP <- ifelse(qual2$treat == "Validated Participation", 1, 0) #indicator for VP

qual2$IW <- ifelse(qual2$treat == "Info Workshop", 1, 0) #indicator for IW

qual2$Mtg <- ifelse(qual2$treat == "Info Workshop" |
                    qual2$treat == "Validated Participation", 1, 0) #indicator for VP or IW

qual2_orig$treat <- case_when(qual2_orig$schoolname %in% SOschs~"Survey Only",
                         qual2_orig$schoolname %in% IWschs~"Info Workshop",
                         qual2_orig$schoolname %in% VPschs~"Validated Participation") 

qual2_long$treat <- case_when(qual2_long$schoolname %in% SOschs~"Survey Only",
                         qual2_long$schoolname %in% IWschs~"Info Workshop",
                         qual2_long$schoolname %in% VPschs~"Validated Participation") 


